********************************************************
* RA Work for Annette and Arvind
* This file created: 10/11/2011
*
*
* For all CDS data pulled from Datastream, 
*     match to TRACE (bond pricing) and FISD (bond ratings)
*
* CMA datasets are extracted from Datastream
*
* Input datasets pulled from WRDS:
* Datasets used:
*       yields_full		(using trace_pull2011sept.sas on WRDS grid)
*       trace_names		(TRACE from WRDS)
*	fisd_mergedissue	(Mergent FISD from WRDS)
*	fisd_ratings		(Mergent FISD from WRDS)
*       fisd_amount_outstanding (Mergent FISD from WRDS)
*       fisd_amt_out_hist       (Mergent FISD from WRDS)
*
*********************************************************
*cd "C:\Users\kpc370\Documents\Kellogg\Year 4\RA - Annette-Arvind\deliverables"
cd "/scratch/users/kpc370/ra"

clear 
set more off
set mem 10g



************************************************************
* 1. Import CDS prices - Datastream
* Format date to be MM/DD/YY
* Make first row: date, x_TICKER1, x_TICKER2
* Second row are the tickers
************************************************************
/*
************* Read CMA Sourced-CDS data ********************
//Read in #Y CDS (Note: add a top row with x_mneumonic in the csv file)
//This step takes some time;
local tenor = "1 5 10" 
set more off
foreach i of local tenor {
	insheet using "CMA_`i'Y.csv", comma clear names case
	drop if _n<=2 
	reshape long x_ , i(date) j(cds_ticker) string
	rename x_ cds_`i'y
	destring cds_`i'y, replace
	if "`i'"=="10" {
		local i = "X"
	}
	di "`i'"
	replace cds_ticker  =subinstr(cds_ticker,"S`i'","",1)
	if "`i'"=="X" {
		replace cds_ticker="SXT" if cds_ticker=="TSX"
	}
	*Update date
	gen month=substr(date,1,2) 
	gen day  =substr(date,4,2) 
	gen year =substr(date,7,2) 
	destring month day year, replace
	replace year = year + 2000
	drop date
	gen date = mdy(month,day,year)
	format date %td
	drop month day year
	sort cds_ticker date
	if "`i'"=="X" {
		local i = "10"
	}
	save cds_`i'y, replace
}
*/
use cds_1y, clear
merge cds_ticker date using cds_5y cds_10y, _merge(merge_cds)
drop merge_cds merge_cds1 merge_cds2 merge_cds
save cds_cma, replace




*************************************************
* Using CDS data, create list of CMA codes (we call them tickers) and CMA firm names (cdslist.dta)
*************************************************;
//Make list of all tickers that were read in from Datastream;
local tenor = "1 5 10" 		
set more off
foreach i of local tenor {
	insheet using "CMA_`i'Y.csv", comma clear names case
	drop if _n>=2
	reshape long x_ , i(date) j(cds_ticker) string
	drop date
	rename x_ cdsname`i'
	if "`i'"=="10" local i = "X"
	replace cds_ticker  =subinstr(cds_ticker,"S`i'","",1)
	if "`i'"=="X" {
		replace cds_ticker="SXT" if cds_ticker=="TSX"
		}		
	sort cds_ticker
	duplicates report
	if "`i'"=="X" local i = "10"
	save cdslist`i', replace
}

use cdslist1, clear
merge cds_ticker using cdslist5 cdslist10, _merge(match)
tab match
*li cds_ticker cdsname1 cdsname5 cdsname10 if match==1
	//Prudential PLC has only 1 year CDS (British company);
	//Prudential Financial is under cds-ticker PFSL;
drop if match==1
*li cds_ticker cdsname1 cdsname5 cdsname10 if match==2	
drop if match==2		
		//The 10 year pull from Datastream gets some other tenors for names containing SX;
		//5 year pull also gets some CMBX;
// Delete CMBX data (tickers start with BX1A1-5 BX2A1-5 BX3A1-5 BX3B1-5 BX3M1-5 BXAJ1-5 BXAM1-5) (OK to drop these)
// BS1 - BS9 BOSTON SCIENTIFIC (ticker=="BSX")  								 (OK to drop these)
// CS1 - CS9 CSX CORP (ticker=="CSX")										 (OK to drop these)
// TS1,TS3,TS5,TS7 SENSIENT TECHS CORP (ticker=="SXT")							 (OK to drop these)
// Note: 10 years SXT is manually corrected above;
// Hospira (HS) - for 5Y, in sample as HSP until June 26, 2007, then switches to HS 6/29/2007 but only for 5 year
// 	I am manually moving these data from HS to HSP in the CSV file for 5 year tenor;
// Royal Dutch (SHOI) - not US based, so dropping;

/* *Dropped above using match==2 drop;
local droplist = "BS0 BS1 BS2 BS3 BS4 BS5 BS6 BS7 BS8 BS9 CS0 CS1 CS2 CS3 CS4 CS5 CS6 CS7 CS8 CS9 TS1 TS3 TS5 TS7"
foreach i of local droplist {
	drop if cds_ticker =="`i'"
}
local droplist = "BX1A1 BX1A2 BX1A3 BX1A4 BX1A5 BX2A1 BX2A2 BX2A3 BX2A4 BX2A5 BX3A1 BX3A2 BX3A3 BX3A4 BX3A5 BX3B1 BX3B2 BX3B3 BX3B4 BX3B5"
foreach i of local droplist {
	drop if cds_ticker =="`i'"
}
local droplist = "BX3M1 BX3M2 BX3M3 BX3M4 BX3M5 BXAJ1 BXAJ2 BXAJ3 BXAJ4 BXAJ5 BXAM1 BXAM2 BXAM3 BXAM4 BXAM5"
foreach i of local droplist {
	drop if cds_ticker =="`i'"
}
*/

tab match 				//866 names have 1, 5, and 10 year CDS;
drop match match1 match2

*Check that names are the same;
local tenor = "1 5 10" 		
set more off
foreach i of local tenor {
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YR CDS - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YRCDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YRCDS(DISC - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YR CDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YR CDS(DISC - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YRCDS (DISC - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YR CDS (DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'Y CDS - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'Y CDS(DISC - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'Y CDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'Y CDS (DISC) - CDS PREM. MID","",.)
	
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YCDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YCDS(DISC - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN `i'YCDS (DISC - CDS PREM. MID","",.)

	replace cdsname`i'  =subinstr(cdsname`i'," SEN.`i'YR CDS - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN.`i'YR CDS (DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN.`i'Y CDS - CDS PREM. MID","",.)

	replace cdsname`i'  =subinstr(cdsname`i',"SEN `i'YR CDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i',"SEN `i'YR CDS (DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i',"SEN `i'Y CDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i',"SEN `i'Y CDS - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i',"SEN `i'Y CDS (DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i',"SEN (DISC) `i'Y CDS - CDS PREM. MID","",.)
				
	replace cdsname`i'  =subinstr(cdsname`i'," `i'YR CDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," `i'YR CDS - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," `i'Y CDS(DISC) - CDS PREM. MID","",.)
	
	replace cdsname`i'  =subinstr(cdsname`i'," SEN`i'YRCDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN.`i'YRCDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN.`i'YCDS(DISC) - CDS PREM. MID","",.)
	replace cdsname`i'  =subinstr(cdsname`i'," SEN (DISC) `i'Y CDS - CDS PREM. MID","",.)
	
}
li if cdsname1~=cdsname5
li if cdsname1~=cdsname10
li if cdsname5~=cdsname10

duplicates report cds_ticker						//866 distinct cds names
sort cds_ticker		
*gen ticker = cds_ticker	
*sort ticker	
sort cds_ticker	
save cdslist, replace




**********************************************************
* Add name back to CDS file
**********************************************************
use cds_cma, clear
sort cds_ticker date
drop if date < mdy(1,1,2008)
drop if date >= mdy(10,1,2010)
	egen index = group(cds_ticker)					// 927 distinct names
	su index
	drop index
*Find if any of these firms have no CDS data over sample;
bysort cds_ticker: egen has1 = count(cds_1y)
bysort cds_ticker: egen has5 = count(cds_5y)
bysort cds_ticker: egen has10= count(cds_10y)
drop if has1==0 & has5==0 & has10==0
	egen index = group(cds_ticker)					// 826 distinct names
	su index
	drop index
merge cds_ticker using cdslist, _merge(m1)
keep if m1==3			//Only keep CDS data for valid names with non-missing data;
drop m1
save cds_cma2, replace

*Mark stale prices from CDS data;
gen lastcds=cds_5y if date==mdy(9,30,2010)
egen mlastcds=max(lastcds), by(cds_ticker)
sort cds_ticker date 
gen bad=1 if cds_5y==mlastcds & cds_5y[_n+1]==mlastcds & cds_5y[_n+2]==mlastcds & cds_5y[_n+3]==mlastcds & cds_5y[_n+4]==mlastcds
replace bad=1 if cds_5y==mlastcds & date==mdy(9,25,2010) & bad[_n-1]==1
replace bad=1 if cds_5y==mlastcds & date==mdy(9,26,2010) & bad[_n-1]==1
replace bad=1 if cds_5y==mlastcds & date==mdy(9,27,2010) & bad[_n-1]==1
replace bad=1 if cds_5y==mlastcds & date==mdy(9,28,2010) & bad[_n-1]==1
replace bad=1 if cds_5y==mlastcds & date==mdy(9,29,2010) & bad[_n-1]==1
replace bad=1 if cds_5y==mlastcds & date==mdy(9,30,2010) & bad[_n-1]==1

gen bad2=1 if cds_5y[_n+1]==cds_5y & cds_5y[_n+2]==cds_5y & cds_5y[_n+3]==cds_5y & cds_5y[_n+4]==cds_5y
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,25,2010) & bad2[_n-1]==1
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,26,2010) & bad2[_n-1]==1
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,27,2010) & bad2[_n-1]==1
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,28,2010) & bad2[_n-1]==1
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,29,2010) & bad2[_n-1]==1
replace bad2=1 if cds_5y==cds_5y[_n-1]  & date==mdy(9,30,2010) & bad2[_n-1]==1

*drop if bad==1
*drop if bad2==1
drop lastcds mlastcds
	egen index = group(cds_ticker)					// 769 with no dropping 
	su index
	drop index
save cds_cma2_clean, replace


*Make list of dates for crossing later;
use cds_cma2_clean, clear
keep date
duplicates drop
save cds_dates, replace


**********************************************************
* Add correct equity ticker/permno for each CDS-date
*    (manually matched using CRSP)
**********************************************************
use cma_permno_mapping_with_all_dates, clear
sort cma_ticker_avj date
save cma_permno_mapping_with_all_dates, replace

use cds_cma2_clean, clear
keep cds_ticker date cds_1y cds_5y cds_10y cdsname5 bad bad2
order cds_ticker date cds_1y cds_5y cds_10y cdsname5
rename cds_ticker cma_ticker_avj
sort cma_ticker_avj date
merge 1:1 cma_ticker_avj date using cma_permno_mapping_with_all_dates
keep if _merge==3
drop if ticker==""
	egen index = group(cma_ticker_avj)					// 483 distinct CMA names
	su index
	drop index
	egen index = group(ticker)						// 488 distinct equity tickers (CRSP)
	su index
	drop index
	egen index = group(permno)						// 481 distinct permnos (CRSP)
	su index
	drop index
drop _merge
order ticker permno date 
sort ticker date
save cds_cma3, replace

*Check instances with multiple CDS data per single equity ticker - should be none;
duplicates report ticker date
duplicates tag ticker date, gen(dup)
// CMCSA/CCC match ticker CMCSA  and T/SBC match ticker T
// Drop CMCSA cds due to some stale pricing in early 2008
// Drop SBC since AT&T is T in 2008
drop if cma_ticker_avj=="CMCSA" & ticker=="CMCSA"
drop if cma_ticker_avj=="SBC" & ticker=="T"
duplicates report ticker date
drop dup
save cds_cma3, replace






**********************************************************
* Merge CMA data with TRACE data
* Note: yields_full.dta dataset extracted in WRDS grid
*        using trace_pull2011sept.sas
**********************************************************
*Some TRACE price records have the wrong cusip - only keep yields for a cusip if they match the trace_names file;
use trace_names, clear
duplicates report cusip_id 
sort cusip_id
capture rename company_symbol names_symbol
capture rename bond_sym_id names_sym_id
save trace_names, replace

use yields_full, clear
sort cusip_id
merge m:1 cusip_id using trace_names
drop if _merge==1
gen bad = 1 if company_symbol~=names_symbol | bond_sym_id ~= names_sym_id
replace bad = 0 if bad==.
su bad
drop if bad==1
drop names_sym_id names_symbol st_date end_date _merge bad
duplicates report cusip_id trd_exctn_dt
save yields_clean, replace

/*
duplicates tag cusip_id trd_exctn_dt, gen(dup)
li cusip_id trd_exctn_dt bond_sym_id company_symbol if dup>0
*/


*Add to TRACE yields file;
use yields_clean, clear
gen ticker = company_symbol
rename trd_exctn_dt date
rename rptd_pr price
keep cusip_id ticker date price
sort ticker date cusip_id
merge m:1 ticker date using cds_cma3, gen(cma_trace)		//use if cds_cma3 has no duplicate ticker-date pairs
drop if cma_trace==1		//Only keep bond data if it matches to CDS information
*drop if cma_trace==2		//Keep CDS-dates that do not match to TRACE price data (indicator: cma_trace==2);
sort ticker date cusip_id
save cma_trace, replace




**********************************************************
* Merge CMA/TRACE data bond characteristics from FISD
* merge uses cusip id to match
**********************************************************
*Make list of all cusips in cma_trace matched file;
keep cusip_id
duplicates drop 		
drop if cusip_id==""
de				// 14,155 bond cusips
rename cusip_id complete_cusip
sort complete_cusip
save cusiplist, replace

*Match to fisd_mergedissue;
use fisd_mergedissue, clear
sort complete_cusip
merge 1:1 complete_cusip using cusiplist, gen(trace_fisd)
keep if trace_fisd==3		// 11,261 bonds with matching info
keep issue_id issuer_id complete_cusip prospectus_issuer_name issuer_cusip maturity security_level offering_amt offering_date 
sort issue_id
save fisdlist, replace

*Find amount outstanding at each date;
use fisd_amt_out_hist, clear
append using fisd_amount_outstanding
sort issue_id effective_date
drop if effective_date == .
merge m:1 issue_id using fisdlist, gen(merge_amt)
drop if merge_amt==1
	egen index = group(issue_id)					// 11,261 distinct bonds
	su index
	drop index
sort issue_id effective_date
bysort issue_id: gen end_date = effective_date[_n+1]		//effective_date (end_date) marks beginning (end) of the amt_outstanding
format end_date %td
order issue_id action_type effective_date end_date amount_outstanding maturity
replace end_date = mdy(12,31,2010) if end_date ==.
sort issue_id effective_date
save fisdamt1, replace

*Expand to all dates from Jan 1, 2008 to Sept 30, 2010;
keep issue_id effective_date end_date amount_outstanding
compress issue_id effective_date end_date amount_outstanding
cross using cds_dates
keep if date >=effective_date & date < end_date
sort issue_id date effective_date
order issue_id date
duplicates report issue_id date
save fisdamt2, replace


*Find Moody's rating at each date;
use fisd_ratings, clear
keep if rating_type == "MR"					//Keep Moody's ratings only (MR vs SPR for S&P)
sort issue_id rating_date
merge m:1 issue_id using fisdlist, gen(merge_amt)
drop if merge_amt==1
	egen index = group(issue_id)						
	su index
	drop index
sort issue_id rating_date
bysort issue_id: gen end_date = rating_date[_n+1]		//rating_date (end_date) marks beginning (end) of the rating
format end_date %td
replace end_date = mdy(12,31,2010) if end_date ==.
sort issue_id rating_date
save fisdrate_mr, replace

*Expand to all dates from Jan 1, 2008 to Sept 30, 2010;
keep issue_id rating_date end_date rating
compress issue_id rating_date end_date rating
timer clear 1
timer on 1
cross using cds_dates		//took about 6 minutes
timer off 1
timer list 1
keep if date >=rating_date & date < end_date
sort issue_id date rating_date
order issue_id date
duplicates report issue_id date
rename rating moodys
save fisd_mr, replace



timer clear 1
timer on 1


*Find S&P's rating at each date;
use fisd_ratings, clear
keep if rating_type == "SPR"					//Keep Moody's ratings only (MR vs SPR for S&P)
sort issue_id rating_date
merge m:1 issue_id using fisdlist, gen(merge_amt)
drop if merge_amt==1
	egen index = group(issue_id)						
	su index
	drop index
sort issue_id rating_date
bysort issue_id: gen end_date = rating_date[_n+1]		//rating_date (end_date) marks beginning (end) of the rating
format end_date %td
replace end_date = mdy(12,31,2010) if end_date ==.
sort issue_id rating_date
save fisdrate_sp, replace

*Expand to all dates from Jan 1, 2008 to Sept 30, 2010;
keep issue_id rating_date end_date rating
compress issue_id rating_date end_date rating
cross using cds_dates
keep if date >=rating_date & date < end_date
sort issue_id date rating_date
order issue_id date
duplicates report issue_id date
rename rating sp
save fisd_sp, replace

timer off 1
timer list 1



*Combine all FISD data for each date;
use fisdamt2, clear
keep issue_id date amount_outstanding
merge 1:1 issue_id date using fisd_mr
keep issue_id date amount_outstanding moodys
merge 1:1 issue_id date using fisd_sp
keep issue_id date amount_outstanding moodys sp
*duplicates report issue_id date
save temp1, replace
timer clear 1
timer on 1
fillin issue_id date
timer off 1
timer list 1
save temp1, replace

*Add constant bond characteristics from fisdlist;
merge m:1 issue_id using fisdlist
order complete_cusip date moodys sp amount_outstanding offering_amt maturity
drop if date == mdy(10,1,2010)
sort complete_cusip date
save fisd_all, replace



*Combine merged CMA-TRACE data with bond characteristics (for those with cusip matches);
use cma_trace, clear
rename cusip_id complete_cusip
duplicates tag complete_cusip date, gen(dup)
assert complete_cusip=="" if dup > 0
drop if complete_cusip==""
sort complete_cusip date
merge 1:1 complete_cusip date using fisd_all, gen(m1)
drop if m1==2			//Only keep observations with CDS info;
save all_cusip, replace


*Add back any CDS data without cusip match;
use cma_trace, clear
rename cusip_id complete_cusip
keep if complete_cusip==""
append using all_cusip
order date ticker complete_cusip cds_1y cds_5y cds_10y complete_cusip price cma_trace moodys sp amount_outstanding
sort ticker date complete_cusip
save cma_trace_fisd, replace


























